# Package names
packages <- c("ggrepel", "pheatmap", "DOSE", "clusterProfiler", "ensembldb", "annotables", "apeglm", "RColorBrewer", "vsn", "DESeq2", "ggplot2", "reshape2", "gage", "gageData", "AnnotationDbi", "tidyr", "dplyr", "org.Mm.eg.db", "gplots", "GO.db", "GOstats", "AnnotationHub", "pathview", "plyr", "tidyverse", "purrr", "GSEABase", "igraph", "vissE", "patchwork", "msigdb", "tibble", "msigdbr","fgsea", "GEOquery","EnhancedVolcano","ggvenn","readxl","msigdbr","Biobase","DEGreport","viridis","ReactomePA")

# Install packages not yet installed
installed_packages <- packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
  BiocManager::install(packages[!installed_packages])
}

# Packages loading
invisible(lapply(packages, library, character.only = TRUE))
tabler<-function(res){
  res=as.data.frame(res)
 res=rownames_to_column(res,var="gene")
 sig <- res[which(res[,7]<0.05 & abs(res[,3])>2),]
#Remove unannotated genes ending in RIK
sig_red=sig[grep("*Rik", sig$gene, invert=TRUE),]
#Remove unnanotated genes starting with Gm
sig_red=sig_red[grep("Gm*", sig$gene, invert=TRUE),]
 return(sig)
}


norm_sig <- function(nc, vec, order){
    require(tibble)
  require(dplyr)
nor=dplyr::filter(nc,nc$gene %in% vec$gene)
nor=data.frame(nor) 
nor=column_to_rownames(nor,var = "gene") 
nor=nor[,order]
return(nor)
}



fc2list=function(nor, res){
ids<-bitr(rownames(nor), fromType = "SYMBOL", toType = "ENTREZID", OrgDb=org.Mm.eg.db)
dedup_ids = ids[!duplicated(ids[c("ENTREZID")]),]
df2 = res[res$gene %in% dedup_ids$SYMBOL,]
df2$Y = dedup_ids$ENTREZID
# Name vector with ENTREZ ids
lis <- df2$log2FoldChange
names(lis) <- df2$Y
# omit any NA values 
lis<-na.omit(lis)
# sort the list in decreasing order (required for clusterProfiler)
lis = sort(lis, decreasing = TRUE)
return(lis)
}




gse <- function(kegg_gene_list){
    gseGO(gene = kegg_gene_list, 
                keyType = "ENTREZID",
                OrgDb = org.Mm.eg.db, 
                ont = "BP",     pAdjustMethod = "BH",eps = 0)
}




GSEAa=function(kegg_gene_list){
GSEA(kegg_gene_list,
TERM2GENE = term2gene,
TERM2NAME = term2name,
pvalueCutoff = 1.00,
minGSSize = 15,
maxGSSize = 500)
}


go <- function(sig_genes){
  enrichGO(gene = names(sig_genes), 
                universe = as.character(grcm38$entrez),
                keyType = "ENTREZID",
                OrgDb = org.Mm.eg.db, 
                ont = "BP", 
                pAdjustMethod = "BH", 
                qvalueCutoff = 0.05, 
                readable = TRUE)}



kgg <- function(kegg_gene_list){
  gseKEGG(geneList= kegg_gene_list, organism= "mmu", minGSSize    = 3,maxGSSize    = 1800, pvalueCutoff = 0.05,pAdjustMethod = "BH",by = "fgsea")
}

reactom=function(kegg_gene_list){
enrichPathway(gene=names(kegg_gene_list),pvalueCutoff=0.05, readable=T,organism = "mouse")
}



topdf=function(file, title){
pdf(paste(title,".pdf",sep = "",collapse = NULL), width=10, height=33)
print(file)
dev.off()
}


hm=function(file,title){
pheatmap(file, scale = "row", clustering_distance_rows = "correlation", fontsize = 3, color = viridis(255), cellwidth = 12.0, main = title , cluster_cols = TRUE)
}


ev=function(res){
EnhancedVolcano(res,
    lab = rownames(res),
    x = 'log2FoldChange',
    y = 'qvalue')
}
term2gene <- msigdbr(species = "Mus musculus", category = "H") %>%
select(gs_name, entrez_gene)
term2name <- msigdbr(species = "Mus musculus", category = "H") %>%
select(gs_name, gs_description) %>%
distinct()
countData <- read_excel("Gene_count_matrix_DEseq2_V4_NOT normalized.xlsx")
colnames(countData)=c("gene_id","WT0_1","WT0_2","WT0_3","WT4_1","WT4_2","WT4_3","WT8_1","WT8_2","WT8_3","WT24_1","WT24_2","WT24_3","KO0_1","KO0_2","KO0_3","KO4_1","KO4_2","KO4_3","KO8_1","KO8_2","KO8_3","KO24_1","KO24_2")
order=c("WT0_1","WT0_2","WT0_3","KO0_1","KO0_2","KO0_3")
countData=countData[,c(1:4,14:16)]
countData=aggregate(. ~gene_id , data = countData, sum)
countData=column_to_rownames(countData,'gene_id')
genotype=c(rep("WT",3),rep("KO",3))
#time=c(rep("0",3),rep("0",3))
colData=as_tibble(cbind(colnames(countData),genotype))
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
## `.name_repair` is omitted as of tibble 2.0.0.
## ℹ Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
colData$genotype=relevel(factor(colData$genotype),ref = "WT")


colnames(colData)=c("samplename","genotype")
dds=DESeqDataSetFromMatrix(countData = countData, colData = colData, design = ~genotype)
## converting counts to integer mode
dds=DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
dds =dds[rowSums(counts(dds))>5,]
sizeFactors(dds)
##     WT0_1     WT0_2     WT0_3     KO0_1     KO0_2     KO0_3 
## 1.0041009 1.0308002 1.0000000 0.9779872 1.0288897 0.9734327
#pca=plotPCA(rld, intgroup=c("group"))
resultsNames(dds)
## [1] "Intercept"         "genotype_KO_vs_WT"
normalized_counts_c=counts(dds, normalized=TRUE)


normalized_counts_c <- normalized_counts_c %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()
res_base=results(dds, test = "Wald")

res_base_tb=tabler(res_base)

nko_base=norm_sig(normalized_counts_c,res_base_tb, order)

ko_base_genelist=fc2list(nor = nko_base,res = res_base_tb )
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(rownames(nor), fromType = "SYMBOL", toType = "ENTREZID", :
## 12.73% of input gene IDs are fail to map...
gse_base <- gse(ko_base_genelist)
## preparing geneSet collections...
## GSEA analysis...
## leading edge analysis...
## done...
##dotplot(gse_base)+scale_y_discrete(labels=function(x) str_wrap(x, width=80))

go_base=go(ko_base_genelist)

dotplot(go_base)+scale_y_discrete(labels=function(x) str_wrap(x, width=80))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

kgg_base=kgg(ko_base_genelist)
## Reading KEGG annotation online: "https://rest.kegg.jp/link/mmu/pathway"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/mmu"...
## preparing geneSet collections...
## GSEA analysis...
## no term enriched under specific pvalueCutoff...
##dotplot(kgg_base)+scale_y_discrete(labels=function(x) str_wrap(x, width=80))

reactom_base=reactom(ko_base_genelist)

dotplot(reactom_base)+scale_y_discrete(labels=function(x) str_wrap(x, width=80))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

countData <- read_excel("Gene_count_matrix_DEseq2_V4_NOT normalized.xlsx")
colnames(countData)=c("gene_id","WT0_1","WT0_2","WT0_3","WT4_1","WT4_2","WT4_3","WT8_1","WT8_2","WT8_3","WT24_1","WT24_2","WT24_3","KO0_1","KO0_2","KO0_3","KO4_1","KO4_2","KO4_3","KO8_1","KO8_2","KO8_3","KO24_1","KO24_2")
order=c("WT0_1","WT0_2","WT0_3","KO0_1","KO0_2","KO0_3","WT4_1","WT4_2","WT4_3","KO4_1","KO4_2","KO4_3","WT8_1","WT8_2","WT8_3","KO8_1","KO8_2","KO8_3","WT24_1","WT24_2","WT24_3","KO24_1","KO24_2")
countData=aggregate(. ~gene_id , data = countData, sum)
countData=column_to_rownames(countData,'gene_id')
genotype=c(rep("WT",12),rep("KO",11))
time=c(rep("0",3),rep("4",3),rep("8",3),rep("24",3),rep("0",3),rep("4",3),rep("8",3),rep("24",2))
colData=as_tibble(cbind(colnames(countData),genotype,time))

colData$genotype=relevel(factor(colData$genotype),ref = "WT")

colData$time=relevel(factor(colData$time), ref = "0")

colnames(colData)=c("samplename","genotype","time")
dds=DESeqDataSetFromMatrix(countData = countData, colData = colData, design = ~genotype+time+time:genotype)
## converting counts to integer mode
dds=DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
dds =dds[rowSums(counts(dds))>5,]
sizeFactors(dds)
##     WT0_1     WT0_2     WT0_3     WT4_1     WT4_2     WT4_3     WT8_1     WT8_2 
## 1.3795048 1.4012575 1.3515025 0.8366702 0.8269461 0.9382454 0.8126441 0.8725579 
##     WT8_3    WT24_1    WT24_2    WT24_3     KO0_1     KO0_2     KO0_3     KO4_1 
## 0.8297980 1.0712922 1.1398486 1.0105491 1.3278793 1.4136084 1.2995531 0.7855650 
##     KO4_2     KO4_3     KO8_1     KO8_2     KO8_3    KO24_1    KO24_2 
## 0.9133560 0.7980808 0.9204218 0.8998345 0.8960029 1.1506075 1.0911143
#pca=plotPCA(rld, intgroup=c("group"))
resultsNames(dds)
## [1] "Intercept"         "genotype_KO_vs_WT" "time_24_vs_0"     
## [4] "time_4_vs_0"       "time_8_vs_0"       "genotypeKO.time24"
## [7] "genotypeKO.time4"  "genotypeKO.time8"
normalized_counts_c=counts(dds, normalized=TRUE)


normalized_counts_c <- normalized_counts_c %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()
# KO_4h_vs_0h : 4h vs 0h in KO.
# It compares the expression levels between KO and WT at 4 hours relative to time 0
# The ko_4h_0h  takes into account the change in gene expression from time 0 to 4 hours for both genotypes
# and then estimates the differential expression between KO and WT specifically at 4 hours, relative to time 0.
ko_4h_0h=results(dds, contrast=list(c("time_4_vs_0","genotypeKO.time4")), test="Wald")
write.csv(as.data.frame(ko_4h_0h),"KO_time_4h_vs_0h.csv")

ko_4h_0h_tb=tabler(ko_4h_0h)

nko_4h_0h=norm_sig(normalized_counts_c,ko_4h_0h_tb,order)

ko_4h_0_genelist=fc2list(nor = nko_4h_0h,res = ko_4h_0h_tb )
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(rownames(nor), fromType = "SYMBOL", toType = "ENTREZID", :
## 12.01% of input gene IDs are fail to map...
GSEAa_4h_0=GSEAa(ko_4h_0_genelist)
## preparing geneSet collections...
## GSEA analysis...
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
GSEAa_4h_0_<- GSEAa_4h_0
GSEAa_4h_0_@result$Description <- GSEAa_4h_0_@result$ID


gse4h_0 <- gse(ko_4h_0_genelist)
## preparing geneSet collections...
## GSEA analysis...
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some of the pathways the P-values were likely overestimated. For
## such pathways log2err is set to NA.
## leading edge analysis...
## done...
go4h_0=go(ko_4h_0_genelist)


kgg_4h_0=kgg(ko_4h_0_genelist)
## preparing geneSet collections...
## GSEA analysis...
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
reactom_4h_0=reactom(ko_4h_0_genelist)


# 
# 
# mydf <- data.frame(Entrez=names(ko_4h_0_genelist),FC=as.numeric(ko_4h_0_genelist))
# mydf <- mydf[abs(mydf$FC) > 2,]
# mydf$group <- "upregulated"
# mydf$group[mydf$FC < 0] <- "downregulated"
# mydf$othergroup <- "C"
# mydf$othergroup[abs(mydf$FC) > 2] <- "A"
# 
# formula_res <- compareCluster(Entrez~group+othergroup, data=mydf, fun="enrichGO", OrgDb = org.Mm.eg.db)
#   
# 
# 
# head(formula_res)
# 
# dotplot(formula_res, x="group") + facet_grid(~othergroup)+scale_y_discrete(labels=function(x) str_wrap(x, width=80))

# 
# 




# KO_8h_vs_0h : 8h vs 0h in KO.
# It compares the expression levels between KO and WT at 8 hours relative to time 0
ko_8h_0h=results(dds, contrast=list(c("time_8_vs_0","genotypeKO.time8")), test="Wald")
write.csv(ko_8h_0h,"KO_time_8h_vs_0h.csv")


ko_8h_0h_tb=tabler(ko_8h_0h)

nko8h_0h=norm_sig(normalized_counts_c,ko_8h_0h_tb, order)







ko_8h_0_genelist=fc2list(nor = nko8h_0h,res = ko_8h_0h_tb )
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(rownames(nor), fromType = "SYMBOL", toType = "ENTREZID", :
## 10.11% of input gene IDs are fail to map...
gse8h_0 <- gse(ko_8h_0_genelist)
## preparing geneSet collections...
## GSEA analysis...
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 1 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some of the pathways the P-values were likely overestimated. For
## such pathways log2err is set to NA.
## leading edge analysis...
## done...
GSEAa_8h_0=GSEAa(ko_8h_0_genelist)
## preparing geneSet collections...
## GSEA analysis...
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
GSEAa_8h_0_<- GSEAa_8h_0
GSEAa_8h_0_@result$Description <- GSEAa_8h_0_@result$ID


go8h_0=go(ko_8h_0_genelist)



kgg_8h_0=kgg(ko_8h_0_genelist)
## preparing geneSet collections...
## GSEA analysis...
## leading edge analysis...
## done...
reactom_8h_0=reactom(ko_8h_0_genelist)


# 
# 
# mydf2 <- data.frame(Entrez=names(ko_8h_0_genelist),FC=as.numeric(ko_8h_0_genelist))
# mydf2 <- mydf[abs(mydf$FC) > 2,]
# mydf2$group <- "upregulated"
# mydf2$group[mydf$FC < 0] <- "downregulated"
# mydf2$othergroup <- "A"
# mydf2$othergroup[abs(mydf$FC) > 2] <- "B"
# 
# mydf3=rbind(mydf,mydf2)
# 
# formula_res <- compareCluster(Entrez~group+othergroup, data=mydf2, fun="gseGO", OrgDb = org.Mm.eg.db)
#   
# head(formula_res)
# 
# 
# 
# dotplot(formula_res, x="group") + facet_grid(~othergroup)+scale_y_discrete(labels=function(x) str_wrap(x, width=80))

# 



# KO_24h_vs_0h : 24h vs 0h in KO.
# It compares the expression levels between KO and WT at 24 hours relative to time 0
ko_24h_0h=results(dds, contrast=list(c("time_24_vs_0","genotypeKO.time24")), test="Wald")
write.csv(ko_24h_0h,"KO_time_24h_vs_0h.csv")

ko_24h_0h_tb=tabler(ko_24h_0h)

nko_24h_0h=norm_sig(normalized_counts_c,ko_24h_0h_tb, order)


ko_24h_0_genelist=fc2list(nor = nko_24h_0h,res = ko_24h_0h_tb )
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(rownames(nor), fromType = "SYMBOL", toType = "ENTREZID", : 10%
## of input gene IDs are fail to map...
gse24h_0 <- gse(ko_24h_0_genelist)
## preparing geneSet collections...
## GSEA analysis...
## leading edge analysis...
## done...
GSEAa_24h_0=GSEAa(ko_24h_0_genelist)
## preparing geneSet collections...
## GSEA analysis...
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
GSEAa_24h_0_<- GSEAa_24h_0
GSEAa_24h_0_@result$Description <- GSEAa_24h_0_@result$ID





go24h_0=go(ko_24h_0_genelist)

kgg_24_0=kgg(ko_24h_0_genelist)
## preparing geneSet collections...
## GSEA analysis...
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
reactom_24h_0=reactom(ko_24h_0_genelist)

# 
# 
# mydf4 <- data.frame(Entrez=names(ko_24h_0_genelist),FC=as.numeric(ko_24h_0_genelist))
# mydf4 <- mydf[abs(mydf$FC) > 2,]
# mydf4$group <- "upregulated"
# mydf4$group[mydf$FC < 0] <- "downregulated"
# mydf4$othergroup <- "D"
# mydf4$othergroup[abs(mydf$FC) > 2] <- "E"
# 
# mydf5=rbind(mydf,mydf2,mydf4)
# 
# formula_res <- compareCluster(Entrez~othergroup:group, data=mydf5, fun="enrichGO", organism="mmu")
#   
# head(formula_res)
# 
# 
# 
# dotplot(formula_res, x="group") + facet_grid(~othergroup)+scale_y_discrete(labels=function(x) str_wrap(x, width=80))

# 
# 








# 
# 
# gl1=list("4_0"=names(ko_4h_0_genelist),"8_0"=names(ko_8h_0_genelist),"24_0"=names(ko_24h_0_genelist))
# 
# ck1 <- compareCluster(geneCluster = gl1, fun = enrichGO, OrgDb = org.Mm.eg.db)
# 
# 
# dotplot(ck1, showCategory=25)+scale_y_discrete(labels=function(x) str_wrap(x, width=80))

# 
# #ck2k <- compareCluster(geneCluster = gl1, fun = gseKEGG, organism="mmu")
# 
# ck2 <- compareCluster(geneCluster = gl1, fun = enrichPathway, organism="mouse")
# 
# 
# dotplot(ck2,showCategory=25)+scale_y_discrete(labels=function(x) str_wrap(x, width=80))






go_mer=merge_result(list("4"=go4h_0,"8"=go8h_0,"24"=go24h_0))

dotplot(go_mer, showCategory=25, font.size=7)+scale_y_discrete(labels=function(x) str_wrap(x, width=80))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

gsea_mer=merge_result(list("4"=gse4h_0,"8"=gse8h_0,"24"=gse24h_0))

dotplot(gsea_mer, showCategory=25, x="NES",font.size=7)+facet_grid(~Cluster)

GSEA_mer=merge_result(list("4"=GSEAa_4h_0_,"8"=GSEAa_8h_0_,"24"=GSEAa_24h_0_))

dotplot(GSEA_mer, showCategory=25, x="NES", font.size=7)+facet_grid(~Cluster)+scale_y_discrete(labels=function(x) str_wrap(x, width=80))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

kgg_mer=merge_result(list("4"=kgg_4h_0,"8"=kgg_8h_0,"24"=kgg_24_0))

dotplot(kgg_mer, showCategory=25, x="NES", font.size=7)+facet_grid(~Cluster)+scale_y_discrete(labels=function(x) str_wrap(x, width=80))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

reac_mer=merge_result(list("4"=reactom_4h_0,"8"=reactom_8h_0,"24"=reactom_24h_0))


dotplot(reac_mer, showCategory=25, font.size=7)+facet_grid(~Cluster)+scale_y_discrete(labels=function(x) str_wrap(x, width=80))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

# Genotype specific effect at 4h : Main effect + genotypeKO.time4
# It compares the expression levels between KO and WT directly at 4 hours, considering the genotype-specific effect.
# It identifies the genes that are differentially expressed between the KO and WT genotypes specifically at 4 hours,
# without considering the change from time 0. Instead, it focuses on the difference between KO and WT directly at 4 hours,
# incorporating both the main effect of genotype and the interaction effect at 4 hours.
ko_4h=results(dds, contrast=list(c("genotype_KO_vs_WT","genotypeKO.time4")), test="Wald")
write.csv(ko_4h,"KO_4h.csv")


ko_4h_tb=tabler(ko_4h)

nko_4h=norm_sig(normalized_counts_c,ko_4h_tb, order)


ko_4h_genelist=fc2list(nor = nko_4h,res = ko_4h_tb )
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(rownames(nor), fromType = "SYMBOL", toType = "ENTREZID", :
## 16.99% of input gene IDs are fail to map...
go4h=go(ko_4h_genelist)

gse4h <- gse(ko_4h_genelist)
## preparing geneSet collections...
## GSEA analysis...
## no term enriched under specific pvalueCutoff...
GSEAa_4h=GSEAa(ko_4h_genelist)
## preparing geneSet collections...
## GSEA analysis...
## no term enriched under specific pvalueCutoff...
GSEAa_4h_<- GSEAa_4h
GSEAa_4h_@result$Description <- GSEAa_4h_@result$ID



kgg_4h=kgg(ko_4h_genelist)
## preparing geneSet collections...
## GSEA analysis...
## leading edge analysis...
## done...
reactom_4h=reactom(ko_4h_genelist)






# Genotype specific effect at 8h : Main effect + genotypeKO.time8
ko_8h=results(dds, contrast=list(c("genotype_KO_vs_WT","genotypeKO.time8")), test="Wald")
write.csv(ko_8h,"KO_8h.csv")


ko_8h_tb=tabler(ko_8h)

nko_8h=norm_sig(normalized_counts_c,ko_8h_tb,order)


ko_8h_genelist=fc2list(nor = nko_8h,res = ko_8h_tb )
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(rownames(nor), fromType = "SYMBOL", toType = "ENTREZID", :
## 14.36% of input gene IDs are fail to map...
go8h=go(ko_8h_genelist)

gse8h <- gse(ko_8h_genelist)
## preparing geneSet collections...
## GSEA analysis...
## no term enriched under specific pvalueCutoff...
GSEAa_8h=GSEAa(ko_8h_genelist)
## preparing geneSet collections...
## GSEA analysis...
## no term enriched under specific pvalueCutoff...
GSEAa_8h_<- GSEAa_8h
GSEAa_8h_@result$Description <- GSEAa_8h_@result$ID


kgg_8h=kgg(ko_8h_genelist)
## preparing geneSet collections...
## GSEA analysis...
## no term enriched under specific pvalueCutoff...
reactom_8h=reactom(ko_8h_genelist)



# Genotype specific effect at 24h : Main effect + genotypeKO.time24
ko_24h=results(dds, contrast=list(c("genotype_KO_vs_WT","genotypeKO.time24")), test="Wald")
write.csv(ko_24h,"KO_24h.csv")

ko_24h_tb=tabler(ko_24h)

nko_24h=norm_sig(normalized_counts_c,ko_24h_tb,order)

ko_24h_genelist=fc2list(nor = nko_24h,res = ko_24h_tb )
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(rownames(nor), fromType = "SYMBOL", toType = "ENTREZID", :
## 15.56% of input gene IDs are fail to map...
gse24h <- gse(ko_24h_genelist)
## preparing geneSet collections...
## GSEA analysis...
## no term enriched under specific pvalueCutoff...
GSEAa_24h=GSEAa(ko_24h_genelist)
## preparing geneSet collections...
## GSEA analysis...
## no term enriched under specific pvalueCutoff...
GSEAa_24h_<- GSEAa_24h
GSEAa_24h_@result$Description <- GSEAa_24h_@result$ID


go24h=go(ko_24h_genelist)


kgg_24h=kgg(ko_24h_genelist)
## preparing geneSet collections...
## GSEA analysis...
## no term enriched under specific pvalueCutoff...
reactom_24h=reactom(ko_24h_genelist)










# 
# 
# gl1=list("4_0"=names(ko_4h_0_genelist),"8_0"=names(ko_8h_0_genelist),"24_0"=names(ko_24h_0_genelist))
# 
# ck1 <- compareCluster(geneCluster = gl1, fun = enrichGO, OrgDb = org.Mm.eg.db)
# 
# 
# dotplot(ck1, showCategory=25)+scale_y_discrete(labels=function(x) str_wrap(x, width=80))

# 
# #ck2k <- compareCluster(geneCluster = gl1, fun = gseKEGG, organism="mmu")
# 
# ck2 <- compareCluster(geneCluster = gl1, fun = enrichPathway, organism="mouse")
# 
# 
# dotplot(ck2,showCategory=25)+scale_y_discrete(labels=function(x) str_wrap(x, width=80))









go_mer=merge_result(list("4"=go4h,"8"=go8h,"24"=go24h))

dotplot(go_mer, showCategory=25, font.size=7)+scale_y_discrete(labels=function(x) str_wrap(x, width=80))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

gsea_mer=merge_result(list("4"=gse4h,"8"=gse8h,"24"=gse24h))

##dotplot(gsea_mer, showCategory=25, font.size=7)+facet_grid(~Cluster)+scale_y_discrete(labels=function(x) str_wrap(x, width=80))



kgg_mer=merge_result(list("4"=kgg_4h,"8"=kgg_8h,"24"=kgg_24h))

dotplot(kgg_mer, showCategory=25, x="NES", font.size=7)+facet_grid(~Cluster)+scale_y_discrete(labels=function(x) str_wrap(x, width=80))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

reac_mer=merge_result(list("4"=reactom_4h,"8"=reactom_8h,"24"=reactom_24h))


dotplot(reac_mer, showCategory=50, font.size=7)+facet_grid(~Cluster)+scale_y_discrete(labels=function(x) str_wrap(x, width=80))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

LRT model for all time points

dds=DESeqDataSetFromMatrix(countData = countData, colData = colData, design = ~genotype+time+time:genotype)
## converting counts to integer mode
dds=DESeq(dds, test = "LRT",reduced = ~time)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
dds =dds[rowSums(counts(dds))>5,]
sizeFactors(dds)
##     WT0_1     WT0_2     WT0_3     WT4_1     WT4_2     WT4_3     WT8_1     WT8_2 
## 1.3795048 1.4012575 1.3515025 0.8366702 0.8269461 0.9382454 0.8126441 0.8725579 
##     WT8_3    WT24_1    WT24_2    WT24_3     KO0_1     KO0_2     KO0_3     KO4_1 
## 0.8297980 1.0712922 1.1398486 1.0105491 1.3278793 1.4136084 1.2995531 0.7855650 
##     KO4_2     KO4_3     KO8_1     KO8_2     KO8_3    KO24_1    KO24_2 
## 0.9133560 0.7980808 0.9204218 0.8998345 0.8960029 1.1506075 1.0911143
normalized_counts_c=counts(dds, normalized=TRUE)


normalized_counts_c <- normalized_counts_c %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()



res_table=as.data.frame(results(dds,contrast=list(c("time_24_vs_0","time_4_vs_0","time_8_vs_0","genotypeKO.time24","genotypeKO.time4","genotypeKO.time8"))))




LRT_tb=tabler(res_table)

nLRT=norm_sig(normalized_counts_c,LRT_tb,order)


LRT_genelist=fc2list(nor = nLRT,res = LRT_tb )
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(rownames(nor), fromType = "SYMBOL", toType = "ENTREZID", : 9%
## of input gene IDs are fail to map...
goLRT=go(LRT_genelist)

dotplot(goLRT, showCategory=25, font.size=7)+scale_y_discrete(labels=function(x) str_wrap(x, width=80))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

gseLRT <- gse(LRT_genelist)
## preparing geneSet collections...
## GSEA analysis...
## leading edge analysis...
## done...
dotplot(gseLRT, showCategory=25, font.size=7, x="NES")+scale_y_discrete(labels=function(x) str_wrap(x, width=80))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

GSEAa_LRT=GSEAa(LRT_genelist)
## preparing geneSet collections...
## GSEA analysis...
## leading edge analysis...
## done...
GSEAa_LRT_<- GSEAa_LRT
GSEAa_LRT_@result$Description <- GSEAa_LRT_@result$ID




kggLRT_=kgg(LRT_genelist)
## preparing geneSet collections...
## GSEA analysis...
## leading edge analysis...
## done...
dotplot(kggLRT_, showCategory=25, x="NES")+scale_y_discrete(labels=function(x) str_wrap(x, width=80))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

reactomLRT_=reactom(LRT_genelist)

dotplot(reactomLRT_, showCategory=25)+scale_y_discrete(labels=function(x) str_wrap(x, width=80))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

###pdf("PCA.pdf")
###print(pca)
# dev.off()
# 
# 
# pdf("dispest.pdf")
# print(plotDispEsts(dds))
# dev.off()
pheatmap(nLRT, scale = "row", clustering_distance_rows = "correlation", fontsize = 3, color = viridis(255), cellwidth = 12.0, main = "All times", cluster_cols = TRUE)

# a=sort(cutree(phea$tree_row, k=2))
# 
# 
# write.csv(names(a[a==1]),"WT_only.csv")
# 
# 
# write.csv(names(a[a==2]),"KO_only.csv")
# 
# idx = grcm38$symbol %in% sig$gene
# 
# ids <- grcm38[idx, ]
# 
# non_duplicates <- which(duplicated(ids$symbol) == FALSE)
# 
# ids <- ids[non_duplicates, ] 
# 
# res_ids <- inner_join(sig, ids, by=c("gene"="symbol"))     
# 
# all_genes <- as.character(res_ids$entrez)
# 
# 
# 
# 
# sig_genes <- as.character(res_ids$entrez)
# 
# 
# res_entrez <- filter(res_ids, entrez != "NA")
# res_entrez <- res_entrez[which(duplicated(res_entrez$entrez) == F), ]
# all_foldchanges <- res_entrez$log2FoldChange
# names(all_foldchanges) <- res_entrez$entrez
# all_foldchanges <- sort(all_foldchanges, decreasing = TRUE)
# 
# symbol_foldchanges=sig$log2FoldChange
# 
# names(symbol_foldchanges)=sig$gene
# 
# 
# all_foldchanges=sort(all_foldchanges,decreasing=TRUE)
# 
# 
# 
# go <- enrichGO(gene = sig_genes, 
#                 universe = as.character(grcm38$entrez),
#                 keyType = "ENTREZID",
#                 OrgDb = org.Mm.eg.db, 
#                 ont = "BP", 
#                 pAdjustMethod = "BH", 
#                 qvalueCutoff = 0.05, 
#                 readable = TRUE)
# 
# 
# 
# 
# 
# 
# go_dp=dotplot(go, showCategory=60)+scale_y_discrete(labels=function(x) str_wrap(x, width=80))

# 
# 
# up=sig[sig[,3]>0,]
# 
# 
# down=sig[sig[,3]<0,]
# 
# up_entrez <- as.character(up$entrez)
# 
# down_entrez =  as.character(down$entrez)
# 
# upg=res_entrez[res_entrez$entrez %in% up_entrez,]
# 
# up_genes=upg$log2FoldChange
# names(up_genes) <- upg$entrez
# up_genes <- sort(up_genes, decreasing = TRUE)
# 
# downg=res_entrez[res_entrez$entrez %in% down_entrez,]
# 
# down_genes=downg$log2FoldChange
# 
# names(down_genes) <- downg$entrez
# 
# down_genes <- sort(down_genes, decreasing = TRUE)
# 
# 
# 
# 
# go_up <- enrichGO(gene = up_entrez, 
#                 universe = all_genes,
#                 keyType = "ENTREZID",
#                 OrgDb = org.Mm.eg.db, 
#                 ont = "BP", 
#                 pAdjustMethod = "BH", 
#                 qvalueCutoff = 0.05, 
#                 readable = TRUE,)
# 
# 
# 
# 
# 
# go_down <- enrichGO(gene = down_entrez, 
#                 universe = all_genes,
#                 keyType = "ENTREZID",
#                 OrgDb = org.Mm.eg.db, 
#                 ont = "BP", 
#                 pAdjustMethod = "BH", 
#                 qvalueCutoff = 0.05, 
#                 readable = TRUE)
# 
# 
# 
# 
# 
# 
# 
# 
# netplot=cnetplot(go, foldchange=all_foldchanges,fixed=F)
# 
# 
# ids<-bitr(rownames(norm_sig), fromType = "SYMBOL", toType = "ENTREZID", OrgDb=org.Mm.eg.db)
# 
# 
# dedup_ids = ids[!duplicated(ids[c("ENTREZID")]),]
# 
# 
# df2 = res_table_tb[res_table_tb$gene %in% dedup_ids$SYMBOL,]
# 
# df2$Y = dedup_ids$ENTREZID
# 
# 
# # Name vector with ENTREZ ids
# kegg_gene_list <- df2$log2FoldChange
# 
# names(kegg_gene_list) <- df2$Y
# 
# # omit any NA values 
# kegg_gene_list<-na.omit(kegg_gene_list)
# 
# # sort the list in decreasing order (required for clusterProfiler)
# kegg_gene_list = sort(kegg_gene_list, decreasing = TRUE)
# 
# 
# 
# gse <- gseGO(gene = kegg_gene_list, 
#                 keyType = "ENTREZID",
#                 OrgDb = org.Mm.eg.db, 
#                 ont = c("BP","MF"),     pAdjustMethod = "BH",eps = 0)
# 
# gsea=dotplot(gse, showCategory=50, split=".sign") + facet_grid(.~.sign)+scale_y_discrete(labels=function(x) str_wrap(x, width=80))

# 
# gsea1=dotplot(gse, showCategory=25, split=".sign", x="NES", decreasing = TRUE,font.size=6)+ scale_size_area(max_size = 10)
# ++scale_y_discrete(labels=function(x) str_wrap(x, width=80))

# 
# 
# #lapply(1:length(gse$Description), function(i) {gseaplot(gse, by = "all", title = gse$Description[i], geneSetID = i)})
# 
# 
# 
# #pdf("gseas.pdf",width = 10)
# #print(lapply(1:length(gse$Description), function(i) {gseaplot(gse, by = "all", title = gse$Description[i], geneSetID = i)}))
# #dev.off()
# 
# 
# kgg <- gseKEGG(geneList= kegg_gene_list, organism= "mmu", minGSSize    = 3,maxGSSize    = 1800, pvalueCutoff = 0.05,pAdjustMethod = "none",by = "fgsea")
# 
# 
# 
# dotplot(kgg , showCategory=25, split=".sign", x="NES", decreasing = TRUE,font.size=6)+scale_y_discrete(labels=function(x) str_wrap(x, width=80))

# 
# 
# 
# 
# 
# #a=lapply(1:10, function(x) {
#   #pathview(gene.data = foldchanges, pathway.id = kgg$ID[x], species = "mmu")})
# 
# 
# 
# pdf("GO_dotplot.pdf", width=10, height=33)
# print(go_dp)
# dev.off()
# 
# 
# 
# pdf("gsea_dotplot.pdf", width=10, height=33)
# print(gsea)
# dev.off()
# 
# 
# pdf("kegg_dotplot.pdf", width=10, height=35)
# print(kgg)
# dev.off()
# 
# 
# pdf("network.pdf", width=27, height=20)
# print(netplot)
# dev.off()
# 
# 
# normalized_counts=rlog(counts(dds))
# 
# 
# 
# 
# 
# normalized_counts <- normalized_counts %>%
#   data.frame() %>%
#   rownames_to_column(var="gene") %>%
#   as_tibble()
# 
# normalized_counts$entrez=mapIds(org.Mm.eg.db,key,keys = normalized_counts$gene,column = "ENTREZID", keytype = "SYMBOL", multiVals = "first")
# 
# normalized_counts$gene=normalized_counts$entrez
# 
# normalized_counts=normalized_counts[,1:24]
# 
# normalized_counts=aggregate(. ~gene , data = normalized_counts, sum)
# 
# normalized_counts <- normalized_counts %>%
#   data.frame() %>%
#  column_to_rownames(var = "gene")
# 
# 
# require(Biobase)
# normalized_counts1<-new("ExpressionSet", exprs=as.matrix(normalized_counts))
# 
# 
# normalized_counts1$time= colData$group
# 
# 
# 
# 
# pathwaysDF <- msigdbr("mouse", category=c("H"))
# pathways <- split(as.character(pathwaysDF$entrez_gene), pathwaysDF$gs_name)
# 
# 
# set.seed(1)
# gesecaRes <- geseca(pathways, exprs(normalized_counts1), minSize = 10, maxSize = 1000, eps = 0,nPermSimple = 10000)
# 
# geseres=plotGesecaTable(gesecaRes |> head(5), pathways,E=exprs(normalized_counts1),)
# 
# IFNG=plotCoregulationProfile(pathway=pathways[["HALLMARK_INTERFERON_GAMMA_RESPONSE"]],
#                         E=exprs(normalized_counts1), conditions=normalized_counts1$time)
# 
# 
# INF=plotCoregulationProfile(pathway=pathways[["HALLMARK_INFLAMMATORY_RESPONSE"]],
#                         E=exprs(normalized_counts1), conditions=normalized_counts1$time)
# 
# 
# TNF_NFKB=plotCoregulationProfile(pathway=pathways[["HALLMARK_TNFA_SIGNALING_VIA_NFKB"]],
#                         E=exprs(normalized_counts1), conditions=normalized_counts1$time)
# 
# 
# TNFA=plotCoregulationProfile(pathway=pathways[["HALLMARK_INTERFERON_ALPHA_RESPONSE"]],
#                         E=exprs(normalized_counts1), conditions=normalized_counts1$time)
# 
# E2F=plotCoregulationProfile(pathway=pathways[["HALLMARK_E2F_TARGETS"]],
#                         E=exprs(normalized_counts1), conditions=normalized_counts1$time)
# 
# 
# 
# 
# 
# 
# 
# fgseaRes <- fgsea(pathways = pathways, 
#                   stats    = all_foldchanges,)
# 
# 
# fgseaResTidy <- fgseaRes %>%
#   as_tibble() %>%
#   arrange(desc(NES))
# 
# # Show in a nice table:
# fgseaResTidy %>% 
#   dplyr::select(-leadingEdge, -ES) %>% 
#   arrange(padj) %>% 
#   DT::datatable()
# 
# 
# 
# fgsearesgraph=ggplot(fgseaResTidy, aes(reorder(pathway, NES), NES)) +
#   geom_col(color=fgseaResTidy$NES>0) +
#   coord_flip() +
#   labs(x="Pathway", y="Normalized Enrichment Score",
#        title="Hallmark pathways NES from GSEA") + 
#   theme_minimal()
# 
# 
# 
# pdf("IFNG.pdf",width = 10)
# print(IFNG)
# dev.off()
# 
# 
# pdf("INF.pdf",width = 10)
# print(INF)
# dev.off()
# 
# 
# pdf("TNF_NFKB.pdf",width = 10)
# print(TNF_NFKB)
# dev.off()
# 
# 
# pdf("TNFA.pdf",width = 10)
# print(TNFA)
# dev.off()
# 
# 
# pdf("E2F.pdf",width = 10)
# print(E2F)
# dev.off()
# 
# 
# pdf("geseca.pdf",width = 10)
# print(geseres)
# dev.off()
# 
# 
# 
# pdf("fgsearesgraph.pdf",width = 10)
# print(fgsearesgraph)
# dev.off()
# 
# 
#